import pandas as pd
import numpy as np

# 生成测试大文件（100万行）
large_data = pd.DataFrame({'id': range(1, 1000001), 'value': np.random.rand(1000000)})
large_data.to_csv('data/test_chunk.csv', index=False)

# 分块读取验证
chunk_size = 100000
total_sum = 0
for chunk in pd.read_csv('data/test_chunk.csv', chunksize=chunk_size):
    total_sum += chunk['value'].sum()

# 验证结果一致性
assert abs(total_sum - large_data['value'].sum()) < 1e-6